*********************************************************************************************************************************************************************
* This Stata do-file contains code that generates all tables and figures in "Competition and Certification: Theory and Evidence from the Audit Market"
* There are 12 required data input files (XXX.dta).
* For data copyright issues, variable firm_id is a pseudo firm identifier. 


	* "audit_samp_prematching_yr2001.dta"  is a 20% random sample of the dataset used in the paper for Table 1 Panel A
	* "audit_samp_postmatching_yr2001.dta" is a 20% random sample of the dataset used in the paper for Table 1 Panel B
	* "audit_psmsample_main.dta" is a 20% random sample of the dataset used in the paper for Table 1 Panel C (loan level variables), Table 3, Table 4, Table 6, and Table 8
	* "audit_psmsample_firm.dta" is a 20% random sample of the dataset used in the paper for Table 1 Panel C (firm level variables)
	* "msa_validitytest.dta"  is a 20% random sample of the dataset used in the paper for Table 2 Columns (1)-(2)
	* "samp_validity_fee.dta" is a 20% random sample of the dataset used in the paper for Table 2 Column  (3)
	* "audit_samp_psm_aq_firmlevel" is a 20% random sample of the dataset used in the paper for Table 5
	* "audit_sampxs_impt.dta"  is a 20% random sample of the dataset used in the paper for Table 7 Panel A
	* "audit_sampxs_analy.dta" is a 20% random sample of the dataset used in the paper for Table 7 Panel B
	* "audit_sampxs_cfo.dta"   is a 20% random sample of the dataset used in the paper for Table 7 Panel C
	* "audit_placebosamp.dta"  is a 20% random sample of the dataset used in the paper for Table 9 
	* "audit_samp_figure.dta"  is the sample of the dataset used in the paper for Figure 1


	
* This program was run without errors in Stata 16.1. 
* Authors: Heng (Griffin) Geng, Cheng Zhang, and Frank S. Zhou
* Date Modified: December 2023
**********************************************************************************************************************************************************************


  clear all
  set more off
  global auditcomp "D:\Dropbox\GengZhangZhou\AuditorCompetition\RCFS"
  cd "$auditcomp"
  

******************************************************
**Table 1: Summary Statistics
******************************************************
  
//Panel A: Before matching

    use audit_samp_prematching_yr2001, clear
    eststo clear
    estpost sum lat roa tang da dummy_res if treat==1,detail
    esttab using table1a_treat.csv, replace label cells(" mean(fmt(3)) sd(fmt(3))  p50(fmt(3))" ) noobs
    estpost sum lat roa tang da dummy_res if treat==0,detail
    esttab using table1a_control.csv, replace label cells(" mean(fmt(3)) sd(fmt(3))  p50(fmt(3))" ) noobs
  
    eststo: reg lat        treat
	eststo: reg roa        treat
	eststo: reg tang       treat
	eststo: reg da         treat
	eststo: reg dummy_res  treat
	
    esttab using table1_testt_pre.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(3))) starlevels( * 0.10 ** 0.05 *** 0.01)  keep(treat) nogap	
	

//Panel B: After matching

	use audit_samp_postmatching_yr2001, clear
    eststo clear
    estpost sum lat roa tang da dummy_res if treat==1,detail
    esttab using table1b_treat.csv, replace label cells(" mean(fmt(3)) sd(fmt(3))  p50(fmt(3))" ) noobs
    estpost sum lat roa tang da dummy_res if treat==0,detail
    esttab using table1b_control.csv, replace label cells(" mean(fmt(3)) sd(fmt(3))  p50(fmt(3))" ) noobs

    eststo: reg lat        treat
	eststo: reg roa        treat
	eststo: reg tang       treat
	eststo: reg da         treat
	eststo: reg dummy_res  treat
	
    esttab using table1_testt_post.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(3))) starlevels( * 0.10 ** 0.05 *** 0.01)  keep(treat) nogap	
	
//Panel C: Summary Statistics

//loan level variables  

  use audit_psmsample_main, clear
  eststo clear
  
  estpost sum lspread loan_size lmaturity i_syndicate if siccd!=. &  lender!=""  ,detail   // industry-by-year bank-by-year
  esttab using table1c_loan.csv, replace label cells("count mean(fmt(3))  sd(fmt(3)) p25(fmt(3)) p50(fmt(3)) p75(fmt(3))" ) noobs

//firm level variables 
   
  use audit_psmsample_firm, clear
  eststo clear
  gen auditfee_1000=auditfee*1000   //multiple by 1000 for presentation
  
  estpost sum lat roa tang lever mb cf_vol auditfee_1000 zscore if lat!=. & cf_vol!=. & lever!=. & mb!=. & tang!=. & roa!=. & zscore!=. & auditfee!=. & siccd!=. ,detail
  esttab using table1c_firm.csv, replace label cells("count mean(fmt(3))  sd(fmt(3)) p25(fmt(3)) p50(fmt(3)) p75(fmt(3))" ) noobs
  

*******************************************************************************
**Table 2 Validity Tests
*******************************************************************************
//Columns (1)-(2): change in HHI and frequency of auditor switching from 2001 to 2002  

    use msa_validitytest, clear
    eststo clear
    eststo: reg d_msa_comp      treat        , cluster(msa)
    eststo: reg d_msa_switch    treat        , cluster(msa)
	
    esttab using table2_part1.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat) nogap
	
	
//Column 3: audit fees change
    eststo clear
	use samp_validity_fee, clear
    gen siccd3=int(siccd/10)
    eststo: areg fee_gratio  treat lat nsegment lever roa arin dummy_audit_chg big4 dummy_foreign,abs(siccd3) cluster(msa) 
   
    esttab using table2_part2.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat) nogap	

****************************************************************
******Table 3+4:  Baseline + Fixed effects Robustness
****************************************************************  

    use audit_psmsample_main, clear
    eststo clear
	 
	 gen siccd2=int(siccd/100)
    egen ind_yr=group(siccd2 fyear)
	egen state_yr=group(state fyear)
	egen bank_yr=group(lender fyear)
	 gen treat_post=treat*post
  eststo: reghdfe lspread treat_post treat post                                                                                                             , abs(ind_yr bank_yr msa) cluster(msa) keepsing
  eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee         if cf_vol!=. & lever!=. & mb!=. & tang!=. & roa!=. & lat!=.   , abs(ind_yr bank_yr msa) cluster(msa) keepsing
  eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore  if cf_vol!=. & lever!=. & mb!=. & tang!=. & roa!=. & lat!=.   , abs(ind_yr bank_yr msa) cluster(msa) keepsing
  eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate                                , abs(ind_yr bank_yr msa) cluster(msa) keepsing
 
    
     esttab using table3.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate ) nogap	 
 
****************************************************************
** Robustness: Firm FE
**************************************************************** 
eststo clear
   eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate  , abs(firm_id msa) cluster(msa) keepsing
 
****************************************************
** Robustness: state-by-year FE
****************************************************

   eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate  , abs(state_yr msa) cluster(msa) keepsing

 esttab using table4.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate ) nogap	 
  
**************************************************************
**Table 5: Audit Quality + Info Asymmetry Measures
************************************************************** 

     use audit_samp_psm_aq_firmlevel, clear
	 eststo clear
     gen siccd2=int(siccd/100)
    egen ind_yr=group(siccd2 fyear)
	 gen treat_post=treat*post
	 
	//Column 1: discretional accrual
    eststo: reghdfe da                   treat_post treat post lat roa lever mb auditfee, abs(ind_yr msa)   cluster(msa) keepsing
	
	//Column 2: accounting restatement
	eststo: reghdfe dummy_res ldummy_res treat_post treat post lat roa lever mb auditfee, abs(ind_yr msa)   cluster(msa) keepsing

	//Columns 3-4: Amihud illiquidity
	eststo: reghdfe amihud_orig          treat_post treat post lat roa lever mb auditfee, abs(ind_yr msa)   cluster(msa) keepsing
    eststo: reghdfe amihud_ref           treat_post treat post lat roa lever mb auditfee, abs(ind_yr msa)   cluster(msa) keepsing

 	esttab using table5_aq.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post treat post ) nogap


****************************************************************
**Table 6: Credit Risk: PVIOL and number of fin covenants
**************************************************************** 

    use audit_psmsample_main, clear
    eststo clear
  
  	  gen siccd2=int(siccd/100)
     egen ind_yr=group(siccd2 fyear)
	 gen treat_post=treat*post

//Column 1 PVIOL
  eststo: reghdfe pviol      treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate, abs(ind_yr msa)   cluster(msa) keepsing

//Column 2 Num of financial covenant  
 eststo: ppmlhdfe num_fincov treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate, abs(ind_yr msa)   cluster(msa) keepsing
 
 esttab using table6_creditrisk.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post treat post ) nogap


********************************************************************
**Table 7 XS: Client Importance & Analyst Following & Operating CF 
********************************************************************   

*************Panel A: Client Importance
eststo clear

    use audit_sampxs_impt, clear
    gen siccd2=int(siccd/100)
   egen ind_yr=group(siccd2 fyear)
   egen bank_yr=group(lender fyear)

  eststo: reghdfe lspread treat##post##impt_treat lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate, abs(ind_yr bank_yr msa) cluster(msa) keepsing
  
  esttab using table7_impt.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01)  nogap
  
  
**************Panel B: Analyst Following
eststo clear

   use audit_sampxs_analy, clear
   gen siccd2=int(siccd/100)
  egen ind_yr=group(siccd2 fyear)
  egen bank_yr=group(lender fyear)
  
  eststo: reghdfe lspread treat##post##analy_treat lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate, abs(ind_yr bank_yr msa) cluster(msa) keepsing
	
  esttab using table7_analy.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) nogap
  
**************Panel C： Operating Cash Flow 
eststo clear

   use audit_sampxs_cfo, clear
   gen siccd2=int(siccd/100)
  egen ind_yr=group(siccd2 fyear)
  egen bank_yr=group(lender fyear)
  
  eststo: reghdfe lspread treat##post##cfo_treat lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate, abs(ind_yr bank_yr msa) cluster(msa) keepsing

  esttab using table7_cfo.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) nogap


****************************************************************
**Table 8: Control for PCAOB offices and Exclude Houston MSA 
****************************************************************
     use audit_psmsample_main, clear
     eststo clear
	 
 gen dummy_pcaob=0
 replace dummy_pcaob=1 if msa=="0520" & year>=2003
 replace dummy_pcaob=1 if msa=="8872" & year>=2003
 replace dummy_pcaob=1 if msa=="1922" & year>=2003
 replace dummy_pcaob=1 if msa=="5602" & year>=2003
 replace dummy_pcaob=1 if msa=="7362" & year>=2003
 
  	  gen siccd2=int(siccd/100)
     egen ind_yr=group(siccd2 fyear)
	 egen state_yr=group(state fyear)
	 egen bank_yr=group(lender fyear)
	  gen treat_post=treat*post

// Column 1: Control for PCAOB locations

 eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate dummy_pcaob  , abs(ind_yr bank_yr msa) cluster(msa) keepsing

 
// Column 2: Exclude Houston MSA 
     drop if city=="houston"  

 eststo: reghdfe lspread treat_post treat post lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate   , abs(ind_yr bank_yr msa) cluster(msa) keepsing

 
esttab using table8_pcaob.csv, replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post dummy_pcaob ) nogap


****************************************************************
**Table 9: Placebo Tests
****************************************************************
    use audit_placebosamp, clear
    eststo clear 
	 gen siccd2=int(siccd/100)
    egen ind_yr=group(siccd2 fyear)
	egen state_yr=group(state fyear)
	egen bank_yr=group(lender fyear)
 
  eststo: reghdfe lspread treat_post2006 treat post2006 lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate if inrange(fyear,2004,2008), abs(ind_yr bank_yr msa) cluster(msa)  keepsing  
  eststo: reghdfe lspread treat_post2007 treat post2007 lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate if inrange(fyear,2005,2009), abs(ind_yr bank_yr msa) cluster(msa)  keepsing  
  eststo: reghdfe lspread treat_post2008 treat post2008 lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate if inrange(fyear,2006,2010), abs(ind_yr bank_yr msa) cluster(msa)  keepsing  
  eststo: reghdfe lspread treat_post2009 treat post2009 lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate if inrange(fyear,2007,2011), abs(ind_yr bank_yr msa) cluster(msa)  keepsing  
  eststo: reghdfe lspread treat_post2010 treat post2010 lat roa tang lever mb cf_vol auditfee zscore loan_size lmaturity i_syndicate if inrange(fyear,2008,2012), abs(ind_yr bank_yr msa) cluster(msa)  keepsing  
	


  esttab using table9_placebo.csv,replace label stats(N r2, fmt(0 3)) cells(b(star fmt(3)) t(par([ ]) fmt(2))) starlevels( * 0.10 ** 0.05 *** 0.01) keep(treat_post2006 treat_post2007 treat_post2008 treat_post2009 treat_post2010) nogap


****************************************************************
**Figure 1: Dynamic effect of audit market competition
****************************************************************

	use audit_samp_figure, clear
	
twoway (connected b fyear, sort lcolor(navy) /// 
	mcolor(navy) msymbol(circle) cmissing(n)) /// 
	(rcap LB UB fyear, /// 
	lcolor(navy) lpattern(dash) msize(medium)), ///
	legend(off)  ///
	ytitle(Coefficient) ///
	ytitle(, height(5)) ///   
	xtitle(, height(5)) ///   
	xtitle("Fiscal Year") ///	
	xline(2001.5, lwidth(vthin) lpattern(dash) lcolor(teal)) ///
	yline(0, lwidth(vthin) lpattern(dash) lcolor(teal))  ///
	xlabel(1999(1)2004, labsize(small)) /// 
	xmtick(1999(1)2004, nolabels ticks) ///
	yscale(range(0 0.2)) 	///
	ylabel(-0.4(0.2)0.4, labsize(small)) /// 
	graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))














  

	